import json
import pandas as pd

chpo_file = '/Users/zhonghua/data/hpo/chpo-2017-07-29.xlsx'
chpo_df = pd.read_excel(chpo_file)
hpo_dict = dict(zip(chpo_df['id'], chpo_df['cn']))
def _get_chpo(hpo_id):
    return hpo_dict.get(hpo_id)
    
def _term_2_dict(term_str, selectd_key=['id', 'name', 'is_a']):
    d = {}
    for x in term_str.split('\n'):
        idx = x.find(':')
        if idx > 0:
            k = x[:idx].strip()
            v = x[idx + 1:].strip()
            if not k in selectd_key:
                continue
            if k == 'is_a':
                if 'is_a' in d.keys():
                    d['is_a'].append(v)
                else:
                    d['is_a'] = [v]
            else:
                d[k] = v
    cn_name = _get_chpo(d['id'])
    if cn_name:
        d['text'] = '{0} ({1}; {2})'.format(d['id'], d['name'], cn_name)
    else:
        d['text'] = '{0} ({1})'.format(d['id'], d['name'])
    d.pop('name')
    return d


def _get_hpo(hpo_li, hpo_id):
    for hpo in hpo_li:
        if hpo['id'] == hpo_id:
            return hpo


def obo_2_jstree(obo_file):
    term_li = open(obo_file).read().split('[Term]\n')[1:]
    term_dict_li = [_term_2_dict(x) for x in term_li]
    for term in term_dict_li:
        if 'is_a' in term.keys():
            for parent in term['is_a']:
                hpo_id = parent.split('!')[0].strip()
                parent_hpo = _get_hpo(term_dict_li, hpo_id)
                if 'children' in parent_hpo.keys():

                    parent_hpo['children'].append(term)
                else:
                    parent_hpo['children'] = [term]
            term.pop('is_a')
    return term_dict_li


hp_obo = '/Users/zhonghua/data/hpo/hp.obo'
hp_json = '/Users/zhonghua/data/hpo/hpo.json'
json.dump(obo_2_jstree(hp_obo)[0]['children'], open(hp_json, 'w'))
